
require(tidyverse)
require(estimatr)
require(Synth)
require(scpi)




####################################################################################################
####################################################################################################
#
#         Containing Large-Scale Criminal Violence through Internationalized Prosecution: 
#              How the Collaboration between the CICIG and Guatemala’s Law Enforcement
#                  Contributed to a Sustained Reduction in the Murder Rate
#         
#                                             - appendix -
#
#                             Guillermo Trejo                   Camilo Nieto-Matiz
#                               gtrejo@nd.edu                 camilo.nieto-matiz@utsa.edu
#                 
####################################################################################################
####################################################################################################





#load data
load("guatemala data.rda")



################################################################
#                APPENDIX B. Data description
################################################################


#========================================================= 
#                     Figure B.3
#         Distribution of the outcome variables
#=========================================================

# homicide rates
dat %>%
  filter(country %in% c("Honduras", "Panama",  "Venezuela", "Costa Rica", "Bolivia", 
                        "Dominican Republic", "Nicaragua", "Guatemala")) %>%
  ggplot(aes(homrates_oms))+
  geom_histogram(bins=25)+
  theme_bw()+
  xlab('Homicide rates')+ylab('')

# human rights violations
dat %>%
  filter(country %in% c("Honduras", "Panama",  "Venezuela", "Costa Rica", "Bolivia",  
                        "Dominican Republic", "Nicaragua", "Guatemala")) %>%
  ggplot(aes(theta_mean_rev))+
  geom_histogram(bins=25)+
  theme_bw()+
  xlab('Human rights violations')+ylab('')




#========================================================= 
#                     Figure B.4
#     Percentage change in homicide rates, 2000–2008
#=========================================================

ch <- dat %>% filter(year==2002|year==2008) %>% select(country,year,homrates_oms) %>% 
  group_by(country) %>%
  dplyr::summarise(pct = -(first(homrates_oms) - last(homrates_oms))/ first(homrates_oms) * 100) %>% 
  filter(!country %in% c('Haiti'))


ggplot(ch, aes(reorder(country, pct), pct))+
  geom_bar(fill="darkblue",stat="identity")+
  theme_bw() + theme(axis.text.x = element_text(angle = 25, vjust = 1, hjust=1)) +
  ylab('% change in homicide rates (2000-2008)')+xlab('')





#========================================================= 
#                       Figure B.5
#           Donor pool: homicide rates over time
#=========================================================

dat %>% select(country,id,year,homrates_oms) %>% filter(year<2009) %>% 
  filter(country %in% c("Guatemala","Honduras", "Panama",  "Venezuela", "Bolivia", 
                        "Dominican Republic", "Nicaragua", "Costa Rica")) %>%
  ggplot(aes(year, homrates_oms))+
  geom_line(aes(group=country))+
  scale_size_manual( values = c(0.2,0.8))+
  theme_bw()+
  theme(legend.position="none")+
  scale_color_manual(values=c("grey70", "grey20"))+
  ylab('')+xlab('')+
  facet_wrap(~country,scale="free_y",ncol = 3)
 


#========================================================= 
#                       Figure B.6
#                  Predictors over time
#=========================================================

# convert to long format
long.data <- dat %>% select(country,id,year,homrates_oms,loggdp, logimr, growth, 
                              ginimkt, schooling2,theta_mean_rev,logpop,v2x_libdem,v2juhcind) %>%
  filter(country %in% c( "Guatemala","Honduras", "Panama",  "Venezuela", "Costa Rica",  
                         "Dominican Republic", "Nicaragua", "Bolivia") & year<2009) %>%
  pivot_longer(cols=c(homrates_oms:v2juhcind))  %>%
  mutate(guat=ifelse(country=='Guatemala',1,0))


v.lab <- c(
  `homrates_oms` = "Homicide rates",
  `loggdp` = "GDP per capita",
  `logimr` = "Infant mortality rate",
  `schooling2` = "Primary school enrollment",
  `ginimkt` = "Inequality - Gini",
  `theta_mean_rev` = "Human rights violations",
  `growth` = "GDP growth",
  `logpop`= 'Population',
  `v2x_libdem`= 'Democracy',
  `v2juhcind`= 'Judicial independence'
)


ggplot(long.data, aes(year, value))+
  geom_line(aes(group=country, color=factor(guat), size=factor(guat)))+
  scale_size_manual( values = c(0.2,0.8))+
  geom_vline(xintercept=2008, linetype=2, color="red")+theme_bw()+
  theme(legend.position="none")+
  scale_color_manual(values=c("grey70", "grey20"))+
  ylab('')+xlab('')+
  facet_wrap(~name, scales="free_y", ncol = 2, labeller = as_labeller(v.lab))+
  scale_x_continuous(expand = c(0, 0))

  

################################################################
#             APPENDIX C. Outcome 1: Homicide Rates
################################################################


#============================================= 
#           C.1 MAIN synthetic control
#=============================================

# create matrix for GUATEMALA
dataprep.out<-
  dataprep(
    foo = dat,
    predictors = c("loggdp", "logimr", "growth", "ginimkt", "schooling2"),
    predictors.op = "mean",
    dependent = "homrates_oms",
    unit.variable = "id",
    time.variable = "year",
    special.predictors = list(
      list("homrates_oms", c(2005:2007), c("mean"))
    ),
    treatment.identifier = "Guatemala",
    controls.identifier = c("Honduras", "Panama",  "Venezuela", "Costa Rica", 
                            "Dominican Republic", "Nicaragua", "Bolivia"),
    time.predictors.prior = c(2003:2007),
    time.optimize.ssr = c(2003:2007),
    unit.names.variable = "country",
    time.plot = c(2002:2019)
  )

synth.out <- synth(data.prep.obj=dataprep.out) 

synth.tables <- synth.tab(
  dataprep.res = dataprep.out,
  synth.res = synth.out)


#============================================= 
#                 Table C.1 -a
#           MAIN Model - Synthetic weights
#============================================= 
synth.tables$tab.w

#============================================= 
#                 Table C.1 -b
#         MAIN Model - Weight by predictor
#============================================= 
synth.tables$tab.v 

#==================================================================== 
#                 Table C.2
#     MAIN Model -  Diff between treated units and synthetic control
#====================================================================
synth.tables$tab.pred




#============================================= 
#       C.2 ROBUSTNESS synthetic control
#=============================================

# create matrix for GUATEMALA
dataprep.out.r<-
  dataprep(
    foo = dat, 
    predictors = c("loggdp", "logimr", "growth", "ginimkt", "schooling2", "logpop", "v2juhcind"),
    predictors.op = "mean",
    dependent = "homrates_oms",
    unit.variable = "id",
    time.variable = "year",
    special.predictors = list(
      list("homrates_oms", c(2005:2007), c("mean"))
    ),
    treatment.identifier = "Guatemala",
    controls.identifier = c("Honduras", "Panama",  "Venezuela", "Costa Rica",  
                            "Dominican Republic", "Nicaragua", "Bolivia"),
    time.predictors.prior = c(2003:2007),
    time.optimize.ssr = c(2003:2007),
    unit.names.variable = "country",
    time.plot = c(2002:2019)
  )

synth.out.r <- synth(data.prep.obj=dataprep.out.r) 

synth.tables.r <- synth.tab(
  dataprep.res = dataprep.out.r,
  synth.res = synth.out.r)


#============================================= 
#                 Table C.3 -a
#      ROBUSTNESS Model - Synthetic weights
#============================================= 
synth.tables.r$tab.w

#============================================= 
#                 Table C.3 -b
#         ROBUSTNESS Model - Weight by predictor
#============================================= 
synth.tables.r$tab.v 

#================================================ 
#                  Table C.3
#     ROBUSTNESS Model -  Diff between treated 
#          units and synthetic control
#================================================
synth.tables.r$tab.pred



#============================================= 
#                  Figure  
#        ROBUSTNESS synthetic plot
#============================================= 

synth_plot <-as_tibble(cbind(dataprep.out.r$Y1plot, dataprep.out.r$Y0plot%*%synth.out.r$solution.w, 2002:2019)) %>% 
  dplyr::rename(synth_g=w.weight, hom_g=`10`, year=V3) %>%
  pivot_longer(cols=c(hom_g, synth_g),
               names_to = "Variable",
               values_to= "Value")

ggplot(synth_plot, aes(y=Value, x=year, color=Variable, linetype=Variable))+
  geom_line(size=1) +
  ylim(10,80) + theme_bw() + 
  #geom_point(data=synth_plot, aes(x=year, y=Value, colour=Variable, shape=Variable)) +
  scale_color_manual(values=c("black", "blue"), labels = c("Guatemala", "Synthetic Guatemala")) + 
  scale_linetype_manual(values = c(1, 2), labels = c("Guatemala", "Synthetic Guatemala")) +
  theme(legend.position = c(0.2,0.15),
        legend.title = element_blank(),
        legend.background = element_rect(fill = "transparent"))+
  scale_x_continuous(expand = c(0, 0), breaks = c(2002, 2004,2006, 2008, 2010, 2012,2014,2016,2018)) +
  geom_vline(xintercept=2008, color="red", linetype=2)  +
  labs(y="Homicide Rates", x="")




#============================================= 
#       C.3  Additional placebo tests
#=============================================

#============================================= 
#                    Figure
#            In-time placebo (2005)
#=============================================

time.placebo<-
  dataprep(
    foo = dat,
    predictors = c("loggdp", "logimr", "growth", "ginimkt", "schooling2"),
    predictors.op = "mean",
    dependent = "homrates_oms",
    unit.variable = "id",
    time.variable = "year",
    special.predictors = list(
      list("homrates_oms", c(2002:2004), c("mean"))
    ),
    treatment.identifier = "Guatemala",
    controls.identifier = c("Honduras", "Panama",  "Venezuela", "Costa Rica",  
                            "Dominican Republic", "Nicaragua", "Bolivia"),
    time.predictors.prior = c(2002:2004),
    time.optimize.ssr = c(2002:2004),
    unit.names.variable = "country",
    time.plot = c(2002:2019)
  )

synth.time <- synth(data.prep.obj=time.placebo) 

synth.tables.time <- synth.tab(
  dataprep.res = time.placebo,
  synth.res = synth.time)


## plot
synth.time <-as_tibble(cbind(time.placebo$Y1plot, time.placebo$Y0plot%*%synth.time$solution.w, 2002:2019)) %>% 
  dplyr::rename(synth_g=w.weight, hom_g=`10`, year=V3) %>%
  pivot_longer(cols=c(hom_g, synth_g),
               names_to = "Variable",
               values_to= "Value")

ggplot(synth.time, aes(y=Value, x=year, col=Variable,linetype=Variable))+
  geom_line(size=1) +
  ylim(10,80) + theme_bw() + 
  scale_color_manual(values=c("black", "blue"), labels = c("Guatemala", "Synthetic Guatemala")) + 
  scale_linetype_manual(values = c(1, 2), labels = c("Guatemala", "Synthetic Guatemala")) +
  theme(legend.position = c(0.2,0.15),
        legend.title = element_blank(),
        legend.background = element_rect(fill = "transparent"))+
  scale_x_continuous(expand = c(0, 0), breaks = c(2002, 2004,2006, 2008, 2010, 2012,2014,2016,2018)) +
  geom_vline(xintercept=2005, color="red", linetype=2)  +
  labs(y="Homicide Rates", x="")




#============================================= 
#                    Figure
#            In-space placebo (Honduras)
#=============================================

space.placebo<-
  dataprep(
    foo = dat,
    predictors = c("loggdp", "logimr", "growth", "ginimkt", "schooling2"),
    predictors.op = "mean",
    dependent = "homrates_oms",
    unit.variable = "id",
    time.variable = "year",
    special.predictors = list(
      list("homrates_oms", c(2005:2007), c("mean"))
    ),
    treatment.identifier = "Honduras",
    controls.identifier = c("Guatemala",  "Panama",  "Venezuela", "Costa Rica",  
                            "Dominican Republic", "Nicaragua", "Bolivia"),
    time.predictors.prior = c(2003:2007),
    time.optimize.ssr = c(2003:2007),
    unit.names.variable = "country",
    time.plot = c(2002:2019)
  )

synth.space <- synth(data.prep.obj=space.placebo)

synth.tables.space <- synth.tab(
  dataprep.res = space.placebo,
  synth.res = synth.space)


## plot
synth.space <-as_tibble(cbind(space.placebo$Y1plot, space.placebo$Y0plot%*%synth.space$solution.w, 2002:2019)) %>% 
  dplyr::rename(synth_g=w.weight, hom_g=`12`, year=V3) %>%
  pivot_longer(cols=c(hom_g, synth_g),
               names_to = "Variable",
               values_to= "Value")

ggplot(synth.space, aes(y=Value, x=year, color=Variable, linetype=Variable))+
  geom_line(size=1) +
  theme_bw() +
  scale_color_manual(values=c("black", "blue"), labels = c("Honduras", "Synthetic Honduras")) + 
  scale_linetype_manual(values = c(1, 2), labels = c("Honduras", "Synthetic Honduras")) +
  theme(legend.position = c(0.2,0.15),
        legend.title = element_blank(),
        legend.background = element_rect(fill = "transparent"))+
  ylim(20,100)+
  scale_x_continuous(expand = c(0, 0), breaks = c(2002, 2004,2006, 2008, 2010, 2012,2014,2016,2018)) +
  geom_vline(xintercept=2008, color="red", linetype=2)  +
  labs(y="Homicide Rates", x="")





#====================================================== 
#   C.4 Homicide Rates - Additional Prediction Intervals
#======================================================

### See "CPS prediction intervals - internationalized prosecution and criminal violence.R" 
### for all code pertaining to prediction intervals.




#====================================================== 
#       C.6   Difference-in-differences approach
#======================================================

### DD estimator
summary(lm_robust(homrates_oms~post*treat, clusters = country,
                  fixed_effects = country~year, data=dat))





################################################################
#        APPENDIX D. Outcome 2: Human rights violations
################################################################


#============================================= 
#           D.1 MAIN synthetic control
#=============================================

# create matrix for GUATEMALA
dataprep.out.hr<-
  dataprep(
    foo = dat,
    predictors = c("logpop", "ginimkt", "loggdp", "v2x_libdem", "growth"),
    predictors.op = "mean",
    dependent = "theta_mean_rev",
    unit.variable = "id",
    time.variable = "year",
    special.predictors = list(
      list("theta_mean_rev", c(2005:2007), c("mean"))),
    treatment.identifier = "Guatemala",
    controls.identifier = c("Honduras", "Panama",  "Venezuela", "Costa Rica",  
                            "Dominican Republic", "Nicaragua"),
    time.predictors.prior = c(2002:2007),
    time.optimize.ssr = c(2002:2007),
    unit.names.variable = "country",
    time.plot = c(2002:2019)
  )

synth.out.hr <- synth(data.prep.obj=dataprep.out.hr)

synth.tables.hr <- synth.tab(
  dataprep.res = dataprep.out.hr,
  synth.res = synth.out.hr)


#============================================= 
#                 Table D.1 -a
#           MAIN Model - Synthetic weights
#============================================= 
synth.tables.hr$tab.w

#============================================= 
#                 Table D.1 -b
#         MAIN Model - Weight by predictor
#============================================= 
synth.tables.hr$tab.v 

#==================================================================== 
#                 Table D.2
#     MAIN Model -  Diff between treated units and synthetic control
#====================================================================
synth.tables.hr$tab.pred

#plot 
path.plot(synth.res = synth.out.hr,
          dataprep.res = dataprep.out.hr, Ylim = c(-1,1))




#============================================= 
#       D.2  Additional placebo tests
#=============================================

#============================================= 
#                   Figure
#            In-time placebo (2005)
#=============================================

time.placebo.hr<-
  dataprep(
    foo = dat,
    predictors = c("logpop", "ginimkt", "loggdp", "v2x_libdem", "growth"),
    predictors.op = "mean",
    dependent = "theta_mean_rev",
    unit.variable = "id",
    time.variable = "year",
    special.predictors = list(
      list("theta_mean_rev", c(2002:2004), c("mean"))),
    treatment.identifier = "Guatemala",
    controls.identifier = c("Honduras", "Panama",  "Venezuela", "Costa Rica",  
                            "Dominican Republic", "Nicaragua"),
    time.predictors.prior = c(2002:2004),
    time.optimize.ssr = c(2002:2004),
    unit.names.variable = "country",
    time.plot = c(2002:2019))

synth.time.hr <- synth(data.prep.obj=time.placebo.hr)

synth.tables.time <- synth.tab(
  dataprep.res = time.placebo.hr,
  synth.res = synth.time.hr)



# plot
synth.time.hr <-as_tibble(cbind(time.placebo.hr$Y1plot, time.placebo.hr$Y0plot%*%synth.time.hr$solution.w, 2002:2019)) %>% 
  dplyr::rename(synth_g=w.weight, hom_g=`10`, year=V3) %>%
  pivot_longer(cols=c(hom_g, synth_g),
               names_to = "Variable",
               values_to= "Value")

ggplot(synth.time.hr, aes(y=Value, x=year, color=Variable, linetype=Variable))+
  geom_line(size=1) +
  ylim(-1.2,1) + theme_bw() + scale_x_continuous(expand = c(0, 0)) +
  geom_point(data=synth.time.hr, aes(x=year, y=Value, colour=Variable, shape=Variable)) +
  theme(legend.position = c(0.2,0.15), 
        legend.background = element_rect(fill = "transparent"))+
  labs(y="Human Rights Violations", x="", color="") + 
  scale_color_manual(values=c("black", "blue"), labels = c("Guatemala", "Synthetic Guatemala")) +
  scale_linetype_manual(values = c("solid","longdash")) +
  guides(linetype = "none", shape="none") +
  geom_vline(xintercept=2005, color="red", linetype=2)



#============================================= 
#                    Figure
#            In-space placebo (Honduras)
#=============================================


space.placebo.hr<-
  dataprep(
    foo = dat,
    predictors = c("logpop", "ginimkt", "loggdp", "v2x_libdem", "growth"),
    predictors.op = "mean",
    dependent = "theta_mean_rev",
    unit.variable = "id",
    time.variable = "year",
    special.predictors = list(
      list("theta_mean_rev", c(2005:2007), c("mean"))),
    treatment.identifier = "Honduras",
    controls.identifier = c( "Panama",  "Venezuela", "Costa Rica",
                             "Dominican Republic", "Nicaragua", "Guatemala"),
    time.predictors.prior = c(2002:2007),
    time.optimize.ssr = c(2002:2007),
    unit.names.variable = "country",
    time.plot = c(2002:2019))

synth.space.hr <- synth(data.prep.obj=space.placebo.hr)

synth.tables.space <- synth.tab(
  dataprep.res = space.placebo.hr,
  synth.res = synth.space.hr)
 


## plot
synth.space.hr <-as_tibble(cbind(space.placebo.hr$Y1plot, space.placebo.hr$Y0plot%*%synth.space.hr$solution.w, 2002:2019)) %>% 
  dplyr::rename(synth_g=w.weight, hom_g=`12`, year=V3) %>%
  pivot_longer(cols=c(hom_g, synth_g),
               names_to = "Variable",
               values_to= "Value")

ggplot(synth.space.hr, aes(y=Value, x=year, color=Variable, linetype=Variable))+
  geom_line(size=1) +
  ylim(-1.2,1) + theme_bw() + scale_x_continuous(expand = c(0, 0)) +
  geom_point(data=synth.space.hr, aes(x=year, y=Value, colour=Variable, shape=Variable)) +
  theme(legend.position = c(0.2,0.15), 
        legend.background = element_rect(fill = "transparent"))+
  labs(y="Human Rights Violations", x="", color="") + 
  scale_color_manual(values=c("black", "blue"), labels = c("Honduras", "Synthetic Honduras")) +
  scale_linetype_manual(values = c("solid","longdash")) +
  guides(linetype = "none", shape="none") +
  geom_vline(xintercept=2008, color="red", linetype=2)





#=============================================================== 
#                        Figure D.3
#   Permutation Test: HR Gaps in Guatemala vs. All Countries
#===============================================================

#create new matrix to store values
dat_pt.hr <- dat %>%
  filter(country %in% c(c("Guatemala", "Honduras", "Panama",  "Venezuela", "Costa Rica",  
                          "Dominican Republic", "Nicaragua"))) %>%
  transform(id=as.numeric(factor(id)))
store.gu.hr <- matrix(NA,length(2002:2019),length(unique(dat_pt.hr$country)))
colnames(store.gu.hr) <- unique(dat_pt.hr$country)


# run placebo test
for(iter in 1:length(unique(dat_pt.hr$country)))
{
  dataprep.gu.hr<-
    dataprep(
      foo = dat_pt.hr,
      predictors = c("logpop", "ginimkt", "loggdp", "v2x_libdem", "growth"),
      predictors.op = "mean",
      dependent = "theta_mean_rev",
      unit.variable = "id",
      time.variable = "year",
      special.predictors = list(
        list("theta_mean_rev", c(2005:2007), c("mean"))),
      treatment.identifier = iter,
      controls.identifier = c(1:(length(unique(dat_pt.hr$country))-1))[-iter],
      time.predictors.prior = c(2003:2007),
      time.optimize.ssr = c(2003:2007),
      unit.names.variable = "country",
      time.plot = c(2002:2019)
    )
  
  
  # run synth
  synth.out.gu.hr <- synth(
    data.prep.obj = dataprep.gu.hr,
    method = "BFGS"
  )
  
  # store gaps
  store.gu.hr[,iter] <- dataprep.gu.hr$Y1plot - (dataprep.gu.hr$Y0plot %*% synth.out.gu.hr$solution.w)
}


# convert to long format 
perm.dat.hr <-   as.data.frame(store.gu.hr)  %>% 
  mutate(year=2002:2019) %>% 
  pivot_longer(cols=c("Costa Rica":"Venezuela"),
               names_to='Country',
               values_to='Values') %>% 
  mutate(treat=ifelse(Country=="Guatemala","Guatemala","Control countries")) 


# plot
perm.dat.hr %>%
  filter(!Country %in% c('Venezuela'))%>% #countries with pre cicig MSPE 5 times larger that Guatemala's
  ggplot(aes(year, Values, group=Country, size=treat, color=treat))+
  geom_line()  + 
  scale_color_manual(values=c("gray80", "blue"),labels=c("Other countries","Guatemala")) +
  scale_size_manual(values=c(.5,1.2),labels=c("Other countries","Guatemala"))+
  labs(x="", y="Human Rights Violations Gaps") +
  geom_hline(yintercept = 0, linetype=1, color="gray30")+
  geom_vline(xintercept = 2008, linetype=2, color="red")+
  theme_bw()+
  theme(legend.position = c(0.2,0.15), 
        legend.title = element_blank(),
        legend.background = element_rect(fill = "transparent"),
        axis.text.x=element_text(size=rel(0.8)))+
  ylim(-1.2,1.5) + scale_x_continuous(expand = c(0, 0), breaks=seq(2002,2018,2)) 



#=============================================================== 
#                        Figure D.4
#          Ratio of post-CICIG RMSPE to pre-CICIG RMSPE
#===============================================================

# compute ratio of post-CICIG RMSPE to pre-CICIG RMSPE                                                  
rmse <- function(x){sqrt(mean(x^2))}
rmspe <- data.frame(rmspe = apply(store.gu.hr[7:18,],2,rmse)/apply(store.gu.hr[1:6,],2,rmse),
                    country = colnames(store.gu.hr)) %>%
  mutate(country=fct_reorder(country,rmspe))


#plot
ggplot(rmspe) +
  geom_point(aes(y=country, x=rmspe), color="blue", size=2) + 
  theme_bw() + labs(y="",x="Post-CICIG to Pre-CICIG RMSPE Ratio") +
  geom_vline(xintercept=median(rmspe$rmspe), color="red", linetype="dashed")



#=============================================================== 
#   Figure D.5 Human Rights - Additional Prediction Intervals
#===============================================================

### See "CPS prediction intervals - internationalized prosecution and criminal violence.R" 
### for all code pertaining to prediction intervals.

